# If you are highly active on Facebook and Instagram, you must have seen ads based on elections by any of the political parties,
# especially BJP and INC. All the parties in India usually spend a lot of money on election campaigns. I recently collected data
# from Meta ads about how much money was spent on Instagram and Facebook ads by each political party during the Indian elections
# 2024 in each state. In this article, we perform elections ad spending analysis using Python
# and how it impactedthe voting patterns.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
results = pd.read_csv(r"D:\Testdirectory\Elections AD Spend Analysis\results.csv")
advertisers = pd.read_csv(r"D:\Testdirectory\Elections AD Spend Analysis\advertisers.csv")
locations = pd.read_csv(r"D:\Testdirectory\Elections AD Spend Analysis\locations.csv")
results.head(5)
| _id | Sl No | State | PC_Name | Total Electors | Polled (%) | Total Votes | Phase | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1.0 | Andaman & Nicobar Islands | Andaman & Nicobar Islands | 315148 | 64.10 | 202018 | 1.0 |
| 1 | 2 | 2.0 | Arunachal Pradesh | Arunachal East | 375310 | 83.31 | 312658 | 1.0 |
| 2 | 3 | 3.0 | Arunachal Pradesh | Arunachal West | 517384 | 73.60 | 380783 | 1.0 |
| 3 | 4 | 4.0 | Assam | Dibrugarh | 1659588 | 76.75 | 1273744 | 1.0 |
| 4 | 5 | 5.0 | Assam | Jorhat | 1727121 | 79.89 | 1379749 | 1.0 |
advertisers.head(5)
| Page ID | Page name | Disclaimer | Amount spent (INR) | Number of ads in Library | |
|---|---|---|---|---|---|
| 0 | 121439954563203 | Bharatiya Janata Party (BJP) | Bharatiya Janata Party (BJP) | 193854342 | 43455 |
| 1 | 351616078284404 | Indian National Congress | Indian National Congress | 108787100 | 846 |
| 2 | 132715103269897 | Ama Chinha Sankha Chinha | Ama Chinha Sankha Chinha | 73361399 | 1799 |
| 3 | 192856493908290 | Ama Chinha Sankha Chinha | Ama Chinha Sankha Chinha | 32294327 | 680 |
| 4 | 109470364774303 | Ellorum Nammudan | Populus Empowerment Network Private Limited | 22399499 | 879 |
locations.head(5)
| Location name | Amount spent (INR) | |
|---|---|---|
| 0 | Andaman and Nicobar Islands | 377858 |
| 1 | Andhra Pradesh | 100819732 |
| 2 | Arunachal Pradesh | 1385654 |
| 3 | Assam | 17478091 |
| 4 | Bihar | 53619242 |
# The advertisers data contains:
# Page ID: A unique identifier for the advertiser’s page.
# Partyme: The name of the advertiser’s party
# Disclaimer: Information about the advertiser, typically who paid for the ads.
# Amount spent (INR): The total amount of money spent on ads in Indian Rupees.
# Number of ads in Library: The number of ads associated with the advertiser.
# The locations data contains:
# The locations data contains:
# Location name: The name of the location.
# Amount spent (INR): The total amount of money spent on ads in that location in Indian Rupees.
# The results data contains:
# The results data contains:
# _id: A unique identifier for the entry.
# Sl No: Serial number.
# State: The name of the state.
# PC_Name: The name of the parliamentary constituency.
# Total Electors: The total number of registered voters.
# Polled (%): The percentage of votes polled.
# Total Votes: The total number of votes cast.
# Phase: The phase of the election.
results.isnull().sum()
_id 0 Sl No 7 State 7 PC_Name 0 Total Electors 0 Polled (%) 0 Total Votes 0 Phase 58 dtype: int64
advertisers.isnull().sum()
Page ID 0 Page name 0 Disclaimer 0 Amount spent (INR) 0 Number of ads in Library 0 dtype: int64
locations.isnull().sum()
Location name 0 Amount spent (INR) 0 dtype: int64
# phase, sno and state have 58 null values
sns.heatmap(results.isnull())
<Axes: >
results.shape, advertisers.shape, locations.shape
((550, 8), (20832, 5), (36, 2))
results[results.duplicated()]
| _id | Sl No | State | PC_Name | Total Electors | Polled (%) | Total Votes | Phase |
|---|
advertisers[advertisers.duplicated()]
| Page ID | Page name | Disclaimer | Amount spent (INR) | Number of ads in Library |
|---|
locations[locations.duplicated()]
| Location name | Amount spent (INR) |
|---|
# no duplicate values
results['State'] = results['State'].str.strip().str.lower()
locations['Location name'] = locations['Location name'].str.strip().str.lower()
# The 'State' in results and 'Location name' in Locations is cleaned by stripping whitespace and converting to lowercase.
merged_data = results.merge(locations, left_on = 'State', right_on = 'Location name', how = 'left')
merged_data.sample(5)
| _id | Sl No | State | PC_Name | Total Electors | Polled (%) | Total Votes | Phase | Location name | Amount spent (INR) | |
|---|---|---|---|---|---|---|---|---|---|---|
| 129 | 130 | 27.0 | karnataka | Tumkur | 1661309 | 78.05 | 1296720 | 2.0 | karnataka | 41659397.0 |
| 516 | 517 | 25.0 | punjab | Bathinda | 1651188 | 69.36 | 1145241 | NaN | NaN | NaN |
| 296 | 297 | 11.0 | andhra pradesh | Kakinada | 1634122 | 80.30 | 1312255 | 4.0 | andhra pradesh | 100819732.0 |
| 240 | 241 | 49.0 | karnataka | Bidar | 1892962 | 65.47 | 1239358 | 3.0 | karnataka | 41659397.0 |
| 519 | 520 | 28.0 | punjab | Firozpur | 1670008 | 67.02 | 1119167 | NaN | NaN | NaN |
# A new DataFrame merged_data is created that contains all rows from results with corresponding rows from locations
# where the 'State' column in results matches the 'Location name' column in locations
merged_data.isnull().sum()
_id 0 Sl No 7 State 7 PC_Name 0 Total Electors 0 Polled (%) 0 Total Votes 0 Phase 58 Location name 31 Amount spent (INR) 31 dtype: int64
merged_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 550 entries, 0 to 549 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 _id 550 non-null int64 1 Sl No 543 non-null float64 2 State 543 non-null object 3 PC_Name 550 non-null object 4 Total Electors 550 non-null int64 5 Polled (%) 550 non-null float64 6 Total Votes 550 non-null int64 7 Phase 492 non-null float64 8 Location name 519 non-null object 9 Amount spent (INR) 519 non-null float64 dtypes: float64(4), int64(3), object(3) memory usage: 43.1+ KB
sns.heatmap(merged_data.isnull())
<Axes: >
# importing required libraries
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default = "plotly_white"
# data preparation
state_ad_spent = merged_data.groupby('State')['Amount spent (INR)'].sum().reset_index()
state_ad_spent.sample(5)
| State | Amount spent (INR) | |
|---|---|---|
| 28 | rajasthan | 1.031708e+09 |
| 17 | lakshadweep | 1.984700e+04 |
| 34 | uttarakhand | 3.570448e+07 |
| 19 | maharashtra | 4.892020e+09 |
| 35 | west bengal | 3.244290e+09 |
# plot creation
fig = px.bar(state_ad_spent, x = 'State', y = 'Amount spent (INR)',
labels = {'State' : 'State', 'Amount spent (INR)' : 'Amount spend (INR)'},
title = 'Total AD Spend by State')
# customizing the plot
fig.update_layout(xaxis = {'categoryorder' : 'total descending'}, xaxis_tickangle = -90, width = 800, height = 600 )
# The bar graph shows the total ad spend (in INR) by state. Uttar Pradesh leads significantly with the highest ad spend,
# followed by Maharashtra and Odisha. States like West Bengal, Tamil Nadu, Andhra Pradesh, and Bihar also show
# substantial ad expenditures. In contrast, states such as Lakshadweep, Dadra & Nagar Haveli, Daman & Diu,
# Andaman & Nicobar Islands, and Arunachal Pradesh have the lowest ad spend.It indicates that larger and more populous
# states tend to spend more on ads, likely reflecting their greater political significance and larger voter base.
state_ad_spent['Amount spent (INR)'].describe()
count 3.600000e+01 mean 9.360097e+08 std 1.645426e+09 min 0.000000e+00 25% 1.710248e+06 50% 4.187547e+07 75% 1.063730e+09 max 7.173450e+09 Name: Amount spent (INR), dtype: float64
total_ad_spent = state_ad_spent['Amount spent (INR)'].sum()
print(f"Total Ad Spent: {total_ad_spent} INR")
Total Ad Spent: 33696348101.0 INR
# data preparation
state_voter_turnout = merged_data.groupby('State')['Polled (%)'].mean().reset_index()
state_voter_turnout.sample(5)
| State | Polled (%) | |
|---|---|---|
| 6 | chhattisgarh | 73.105455 |
| 1 | andhra pradesh | 80.770800 |
| 7 | dadra & nagar haveli and\ndaman & diu | 70.645000 |
| 35 | west bengal | 79.188095 |
| 28 | rajasthan | 61.426400 |
# plot creation
fig = px.bar(state_voter_turnout, x='State', y='Polled (%)',
labels={'State': 'State', 'Polled (%)': 'Voter Turnout (%)'},
title='Average Voter Turnout by State')
# customizing the data
fig.update_layout(xaxis={'categoryorder': 'total descending'},
xaxis_tickangle=-90,
width=800,
height=600)
# Lakshadweep has the highest average voter turnout at nearly 80%, followed closely by Tripura and Assam.
# States like Andhra Pradesh, Sikkim, and West Bengal also show high voter engagement, with turnouts above 70%.
# On the other end of the spectrum, states such as Bihar, Uttar Pradesh, and Uttarakhand have the lowest average voter turnout,
# around 50-55%. It indicates significant regional variations in voter participation, with some smaller states
# and union territories exhibiting higher engagement compared to larger states with higher ad spend.
# data preparation and cleaning
advertisers['Amount spent (INR)'] = pd.to_numeric(advertisers['Amount spent (INR)'], errors = 'coerce')
advertisers.dropna(subset = ['Amount spent (INR)'], inplace = True)
advertisers.rename(columns={'Page name': 'Party name'}, inplace=True)
advertisers.head(5)
| Page ID | Party name | Disclaimer | Amount spent (INR) | Number of ads in Library | |
|---|---|---|---|---|---|
| 0 | 121439954563203 | Bharatiya Janata Party (BJP) | Bharatiya Janata Party (BJP) | 193854342.0 | 43455 |
| 1 | 351616078284404 | Indian National Congress | Indian National Congress | 108787100.0 | 846 |
| 2 | 132715103269897 | Ama Chinha Sankha Chinha | Ama Chinha Sankha Chinha | 73361399.0 | 1799 |
| 3 | 192856493908290 | Ama Chinha Sankha Chinha | Ama Chinha Sankha Chinha | 32294327.0 | 680 |
| 4 | 109470364774303 | Ellorum Nammudan | Populus Empowerment Network Private Limited | 22399499.0 | 879 |
# data aggregation and sorting
party_ad_spent = advertisers.groupby('Party name')['Amount spent (INR)'].sum().sort_values(ascending = False)
top_5_parties = party_ad_spent.head(5).reset_index()
top_5_parties
| Party name | Amount spent (INR) | |
|---|---|---|
| 0 | Bharatiya Janata Party (BJP) | 193854342.0 |
| 1 | Ama Chinha Sankha Chinha | 112412941.0 |
| 2 | Indian National Congress | 108787100.0 |
| 3 | Ellorum Nammudan | 23806041.0 |
| 4 | BJP Odisha | 19573782.0 |
# visualization with plotly
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#c2c2f0']
fig = px.pie(top_5_parties, values='Amount spent (INR)', names='Party name',
title='Top 5 Parties by Ad Spend', color_discrete_sequence=colors,
labels={'Party name': 'Political Party', 'Amount spent (INR)': 'Ad Spend (INR)'})
fig.update_traces(textinfo='percent')
fig.update_layout(
showlegend=True,
legend=dict(
orientation="v",
yanchor="top",
y=1,
xanchor="left",
x=-0.3
),
title=dict(
y=0.95,
x=0.5,
xanchor='center',
yanchor='top'
),
margin=dict(l=200, r=50, t=100, b=50)
)
# The Bharatiya Janata Party (BJP) has the highest ad spend, accounting for 42.3% of the total. This is followed by the
# Ama Chinha Sankha Chinha party at 24.5% and the Indian National Congress at 23.7%. Ellorum Nammudan and BJP Odisha have
# significantly lower ad spends, at 5.19% and 4.27%, respectively. It indicates that BJP dominates in terms of ad spending
# on Facebook and Instagram ads, with nearly half of the total expenditure, suggesting a significant investment
# in advertising ompared to other parties.
correlation = merged_data[['Amount spent (INR)', 'Polled (%)']].corr()
print(correlation)
Amount spent (INR) Polled (%) Amount spent (INR) 1.000000 -0.010688 Polled (%) -0.010688 1.000000
plt.figure(figsize=(8, 6))
sns.heatmap(correlation, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix')
plt.show()
# The correlation matrix shows that the relationship between the amount spent (INR) and the percentage of votes polled (%) is
# very weak and slightly negative, with a correlation coefficient of -0.010688. This indicates that there is virtually no
# linear relationship between ad spend and voter turnout. In other words, increasing the amount spent on advertising does not
# significantly affect the percentage of voter turnout.
# The correlation coefficient ranges from -1 to 1.
# A coefficient close to 1 indicates a strong positive correlation (as one variable increases, the other tends to increase).
# A coefficient close to -1 indicates a strong negative correlation (as one variable increases, the other tends to decrease).
# A coefficient close to 0 indicates no linear correlation between the variables.
# visualization
fig = px.scatter(merged_data, x='Amount spent (INR)', y='Polled (%)',
color='State',
labels={'Amount spent (INR)': 'Ad Spend (INR)', 'Polled (%)': 'Voter Turnout (%)'},
title='Ad Spend and Voter Turnout by Parliamentary Constituency')
fig.update_layout(width=800, height=600)
fig.show()
D:\ANACONDA\lib\site-packages\plotly\express\_core.py:1979: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
# It shows that higher ad spending does not necessarily correlate with higher voter turnout. Voter turnout seems to cluster
# between 60% and 80% across most constituencies, regardless of the ad spend amount, which ranges from 0 to 150 million INR.
# This suggests that other factors besides ad spend may play a significant role in influencing voter turnout.
fig = px.histogram(merged_data, x = ['Amount spent (INR)'], nbins = 30, marginal = 'box',
labels = {'Amount spent (INR)' : 'Amount spend (INR)'},
title = 'Distribution of AD Spending')
fig.update_traces(marker = dict(line = dict(color='black', width=1)))
fig.update_layout(bargap=0.1, width=800, height=600)
fig.show()
# The histogram indicates that most constituencies have ad spends clustered around the 50M and 100M INR marks, with fewer
# constituencies spending less than 10M INR or more than 150M INR. The box plot highlights that the median ad spend is around
# 70M INR, with the interquartile range (IQR) spanning from approximately 30M to 110M INR. There are a few outliers,
# particularly a constituency with an exceptionally high ad spend above 150M INR. This distribution suggests that while the
# majority of ad spends are concentrated within a certain range, there are notable exceptions with significantly higher
# expenditures.
import plotly.graph_objects as go
# data preparation
phase_analysis = merged_data.groupby('Phase').agg({'Amount spent (INR)': 'sum', 'Polled (%)': 'mean'}).reset_index()
fig = go.Figure()
# Add Bar trace for Ad Spend (INR)
fig.add_trace(go.Bar(
x=phase_analysis['Phase'],
y=phase_analysis['Amount spent (INR)'],
name='Ad Spend (INR)',
marker_color='indianred',
yaxis='y1'
))
# Add Scatter trace for Voter Turnout (%)
fig.add_trace(go.Scatter(
x=phase_analysis['Phase'],
y=phase_analysis['Polled (%)'],
name='Voter Turnout (%)',
marker_color='lightsalmon',
yaxis='y2'
))
# Update layout of the figure
fig.update_layout(
title='Ad Spend and Voter Turnout by Election Phase',
xaxis=dict(title='Election Phase'),
yaxis=dict(
title='Ad Spend (INR)',
titlefont=dict(color='indianred'),
tickfont=dict(color='indianred')
),
yaxis2=dict(
title='Voter Turnout (%)',
titlefont=dict(color='lightsalmon'),
tickfont=dict(color='lightsalmon'),
overlaying='y',
side='right'
),
legend=dict(x=0.1, y=1.1, orientation='h'),
width=800,
height=600
)
fig.show()
# There is no consistent trend between ad spend and voter turnout. Election phases 1 and 4 have the highest ad spends, with phase
# 4 peaking in voter turnout at around 70%. However, phase 1, despite high ad spend, has a lower voter turnout of about 67%.
# Phases with moderate ad spend (e.g., 2 and 6) have lower voter turnout, while phase 5 has a notably low turnout despite
# moderate spending.
# Overall, the analyses indicate that higher ad spend does not guarantee higher voter turnout and voter engagement is influenced
# by various other factors. Larger and more significant states tend to spend more on ads, but this does not necessarily
# translate to higher voter participation. Political parties, particularly the BJP, invest heavily in advertising,
# yet the effectiveness of this spending in increasing voter turnout is questionable